---
title: "Replication Script for: Pay to Play? How Reducing APSA Division Fees Increases Graduate Student Participation"
author: "Bailey R. Fairbanks, Fabian G. Neuner, Isabel M. Perera, and Christine M. Slaughter"
date: "2/24/2021 (first version: 11/21/2019)"
output: pdf_document
editor_options: 
  chunk_output_type: console
---

# Loading Packages and Data


The data on dues and enrollment for each of APSA’s 44 Divisions from 2017-2019 (see Division_Fee_Data.csv) were provided by the APSA Department of Research and Development. 

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

#libraries
library(haven)
library(foreign)
library(ggplot2)
library(gridExtra)
library(tidyverse)
library(recoder)
library(readxl)
library(stringr)
library(stargazer)
library(did)

# load data
data <- read.csv("Division_Fee_Data.csv")

```

# Setting up the Data

Some sections report online and print separately so we delete the corresponding rows so that we only have one entry for each section in each year.

```{r}

# Check Divisions
#table(data$section_name)

# Delete Online Only and Online + Reg so that one row per Division
data <- data[data$online_only==0,] # Deleting ONline Only
data <- data[data$online_plus_reg==0,] # Deleting Online + Regular 
data <- data[data$section_num!=c(48,49),] # Deleting new sections (no baseline data)

# Create Treated Indicator (1= if Division reduced fees)
data$treated2018 <- 0
data$treated2018[data$sdues_2018<data$sdues_2017] <-1
table(data$treated2018)
data$treated2019 <- 0
data$treated2019[data$sdues_2019<data$sdues_2017] <-1
table(data$treated2019)
data$treated2019_only <- 0
data$treated2019_only[data$sdues_2019<data$sdues_2018] <-1
table(data$treated2019_only)


# Create Variable for Fees already at Zero
data$already_at_zero <- 0
data$already_at_zero[data$sdues_2017==0] <- 1
table(data$already_at_zero) # 4 sections already at zero


# Change Data to Long Format
data_long <- gather(data, year, membership, student_aug2017:student_sep2019,factor_key=TRUE)

# Creating Year Variable
numextract <- function(string){ 
  str_extract(string, "\\-*\\d+\\.*\\d*")
}

data_long$year <- numextract(data_long$year)
data_long$year <- as.numeric(data_long$year)


# Time / Treatment Indicator:

data_long$time = ifelse(data_long$year >= 2018, 1, 0)
data_long$time2 = ifelse(data_long$year >= 2019, 1, 0)

data_long$treated_18 <- recoder(data_long$treated2018, '0:"No"; 1:"Yes"')
data_long$treated_19 <- recoder(data_long$treated2019, '0:"No"; 1:"Yes"')
data_long$treated_19only <- recoder(data_long$treated2019_only, '0:"No"; 1:"Yes"')

# Creating separate datasets for 2017-2018 and 2018-2019

data_long_1718 <- data_long[data_long$year<=2018,] # 2017-2018
data_long_1819 <- data_long[data_long$year>2017,] # 2018-2019

data_long$year <- as.factor(data_long$year)
data_long_1718$year <- as.factor(data_long_1718$year)
data_long_1819$year <- as.factor(data_long_1819$year)



```


# Manuscript Figures

## Figure 1. Distribution of Graduate Student Membership Fees across APSA Divisions, 2017-2019 

Figure 1 presents an overview of the distribution of APSA Division Graduate Student Membership Fees across the three years under examination.

```{r}

# Create Categorical Section Fees Variable
data_long$fees_category <- NA
data_long$fees_category[data_long$sdues_2017 ==0 & data_long$year==2017] <- "No Fees ($0)"
data_long$fees_category[data_long$sdues_2017 >0 & data_long$sdues_2017 <=10 & data_long$year==2017] <- "$1-$10"
data_long$fees_category[data_long$sdues_2017 >10 & data_long$sdues_2017 <=20 & data_long$year==2017] <- "$11-$20"
data_long$fees_category[data_long$sdues_2017 >20 & data_long$year==2017] <- ">$20"
data_long$fees_category[data_long$sdues_2018 ==0 & data_long$year==2018] <- "No Fees ($0)"
data_long$fees_category[data_long$sdues_2018 >0 & data_long$sdues_2018 <=10 & data_long$year==2018] <- "$1-$10"
data_long$fees_category[data_long$sdues_2018 >10 & data_long$sdues_2018 <=20 & data_long$year==2018] <- "$11-$20"
data_long$fees_category[data_long$sdues_2018 >20  & data_long$year==2018] <- ">$20"
data_long$fees_category[data_long$sdues_2019 ==0 & data_long$year==2019] <- "No Fees ($0)"
data_long$fees_category[data_long$sdues_2019 >0 & data_long$sdues_2019 <=10 & data_long$year==2019] <- "$1-$10"
data_long$fees_category[data_long$sdues_2019 >10 & data_long$sdues_2019 <=20 & data_long$year==2019] <- "$11-$20"
data_long$fees_category[data_long$sdues_2019 >20 & data_long$year==2019] <- ">$20"

# Correct Ordering:
data_long$fees_category <- factor(data_long$fees_category, levels = c("No Fees ($0)", "$1-$10", "$11-$20", ">$20"))
table(data_long$fees_category)


ggplot(data_long, aes(fees_category, group = year,  fill=year)) +
  theme_bw(base_size = 15)+scale_fill_grey()+
  geom_bar(position=position_dodge()) +
  ylim(0, 30)+
  geom_text(stat='count', aes(label=..count..), position = position_dodge(0.9), vjust=-1)+
  labs(x="Division Fees", y = "Number of Divisions", fill ="Year") 





```

## Figure 2. Changes in Student Division Membership, by Treatment Status

```{r}

p1 <- ggplot(data_long_1718, aes(year, membership, group = treated_18)) +
  aes(linetype = treated_18) + stat_summary(geom = 'line', size=1.2)+
  aes(pointtype = treated_18) + stat_summary(geom = 'point', size=1.2)+
  scale_x_discrete(name="Year")+
  scale_y_continuous(name="Average Student Membership") +
  coord_cartesian(ylim=c(50, 400))+
  labs(linetype="Reduced Membership \nFees: 2017-2018") +
  theme_bw()+
  theme(legend.position = "bottom")+
  theme(legend.key.size = unit(1.5,"line"))+
  annotate("text", x = 0.85, y = 70, label = "69.9") +
  annotate("text", x = 0.85, y = 85, label = "84.8") +
  annotate("text", x = 2.2, y = 90, label = "90.2") +
  annotate("text", x = 2.2, y = 135, label = "134.5")

p2 <-ggplot(data_long_1819, aes(year, membership, group = treated_19only)) +
  aes(linetype = treated_19only) + stat_summary(geom = 'line', size=1.2)+
  aes(pointtype = treated_19only) + stat_summary(geom = 'point', size=1.2)+
  scale_x_discrete(name="Year")+
  scale_y_continuous(name="Average Student Membership") +
  coord_cartesian(ylim=c(50, 400))+
  labs(linetype="Reduced Membership \nFees: 2018-2019") +
  theme_bw()+
  theme(legend.position = "bottom")+
  theme(legend.key.size = unit(1.5,"line"))+
  annotate("text", x = 0.85, y = 113, label = "112.5") +
  annotate("text", x = 0.85, y = 96, label = "96.3") +
  annotate("text", x = 2.2, y = 159, label = "159.6") +
  annotate("text", x = 2.2, y = 377, label = "377.5")

p3 <- ggplot(data_long, aes(year, membership, group = treated_19)) +
  aes(linetype = treated_19) + stat_summary(geom = 'line', size=1.2)+
  aes(pointtype = treated_19) + stat_summary(geom = 'point', size=1.2)+
  scale_x_discrete(name="Year")+
  scale_y_continuous(name="Average Student Membership") +
  coord_cartesian(ylim=c(50, 400))+
  labs(linetype="Reduced Membership \nFees: 2017-2019") +
  theme_bw()+
  theme(legend.position = "bottom")+
  theme(legend.key.size = unit(1.5,"line"))+
  annotate("text", x = 0.82, y = 83, label = "79.5") +
  annotate("text", x = 0.82, y = 75, label = "77") +
  annotate("text", x = 3.2, y = 103, label = "103.6") +
  annotate("text", x = 3.2, y = 249, label = "248.7")


grid.arrange(p1, p2, p3, nrow=1)
```



# Supplementary Materials

## Supplementary Materials: Additional Descriptives

Here we 

```{r}

# Descriptives
mean(data_long_1718$sdues_2017) 
mean(data_long_1718$sdues_2018) 
mean(data_long_1718$sdues_2019) 

mean(data_long$membership) 
mean(data_long$membership[data_long$year==2017])
mean(data_long$membership[data_long$year==2018]) 
mean(data_long$membership[data_long$year==2019])  

mean(data_long$membership[data_long$year==2017 & data_long$treated_18=="No"])
mean(data_long$membership[data_long$year==2017 & data_long$treated_18=="Yes"])

mean(data_long$membership[data_long$year==2018 & data_long$treated_18=="No"])
mean(data_long$membership[data_long$year==2018 & data_long$treated_18=="Yes"])

mean(data_long$membership[data_long$year==2017 & data_long$treated_19=="No"])
mean(data_long$membership[data_long$year==2019 & data_long$treated_19=="Yes"])

mean(data_long$membership[data_long$year==2018 & data_long$treated_19only=="No"])
mean(data_long$membership[data_long$year==2018 & data_long$treated_19only=="Yes"])

mean(data_long$membership[data_long$year==2019 & data_long$treated_19only=="No"])
mean(data_long$membership[data_long$year==2019 & data_long$treated_19only=="Yes"])

```

# Supplementary Materials: Figure A1. 2017-2019 Trend for Those Treated 2018-2019

This Figure shows the pre-treatment trend (2017-2018) for the treatment and control groups used in the 2018-2019 analyses (see also Table A2).

```{r}
ggplot(data_long, aes(year, membership, group = treated_19only)) +
  aes(linetype = treated_19only) + stat_summary(geom = 'line', size=1.2)+
  aes(pointtype = treated_19only) + stat_summary(geom = 'point', size=1.2)+
  scale_x_discrete(name="Year")+
  scale_y_continuous(name="Average Student Membership") +
  labs(linetype="Reduced Membership \nFees: 2018-2019") +
  theme_bw()+
  theme(legend.position = "bottom")+
  theme(legend.key.size = unit(1.5,"line"))

```

## Supplementary Materials: Tables A1-A3

### Table A1

First, the difference-in-difference estimate of reducing fees between 2017 and 2018 on membership:

  
```{r}
model1 <- lm(membership ~ treated_18 + time + treated_18*time, data=data_long_1718)
model2 <- lm(membership ~ treated_18 + time + treated_18*time + already_at_zero, data=data_long_1718)
stargazer(model1, model2, type="text", out="did_table1.html")
#stargazer(model1, model2, type="html", out="did_table1.html")

```

### Table A2

Second, the difference-in-difference estimate of reducing fees between 2017 and 2019 on membership:
  
  
```{r}
model3 <- lm(membership ~ treated_19 + time2 + treated_19*time2, data=data_long)
model4 <- lm(membership ~ treated_19 + time2 + treated_19*time2+ already_at_zero, data=data_long)
stargazer(model3, model4, type="text", out="did_table2.html")
#stargazer(model3, model4, type="html", out="did_table2.html")

```

### Table A3

Third, the difference-in-difference estimate of reducing fees between 2018 and 2019 on membership:
  
  
```{r}
model5 <- lm(membership ~ treated_19only + time2 + treated_19only*time2, data=data_long_1819)
model6 <- lm(membership ~ treated_19only + time2 + treated_19only*time2+ already_at_zero, data=data_long_1819)
stargazer(model5, model6, type="text", out="did_table3.html")
#stargazer(model5, model6, type="html", out="did_table3.html")

```